{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 用训练好的参数训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#导入必要的包\n",
    "import pandas as pd\n",
    "import xgboost as xgb\n",
    "from xgboost import XGBClassifier\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#读取数据\n",
    "train = pd.read_csv(\"./RentListingInquries_FE_train.csv\")\n",
    "test = pd.read_csv(\"./RentListingInquries_FE_test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = train[:2000]\n",
    "test = test[:1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_train = train['interest_level']\n",
    "X_train = train.drop(['interest_level'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2000 entries, 0 to 1999\n",
      "Columns: 227 entries, bathrooms to work\n",
      "dtypes: float64(9), int64(218)\n",
      "memory usage: 3.5 MB\n"
     ]
    }
   ],
   "source": [
    "X_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Columns: 227 entries, bathrooms to work\n",
      "dtypes: float64(9), int64(218)\n",
      "memory usage: 1.7 MB\n"
     ]
    }
   ],
   "source": [
    "test.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据标准化\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "ss_X = StandardScaler()\n",
    "X_train = ss_X.fit_transform(X_train)\n",
    "test = ss_X.transform(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#使用最佳参数\n",
    "xgb = XGBClassifier(\n",
    " learning_rate =0.1,\n",
    "    n_estimators=72,  \n",
    "    max_depth=3,\n",
    "    min_child_weight=3,\n",
    "    gamma=0,\n",
    "    subsample=0.6,\n",
    "    colsample_bytree=0.8,\n",
    "    colsample_bylevel = 0.7,\n",
    "    objective= 'multi:softprob',\n",
    "    seed=3,\n",
    "    reg_alpha=1.5,\n",
    "    reg_lambda = 0.5\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=0.7,\n",
       "       colsample_bytree=0.8, gamma=0, learning_rate=0.1, max_delta_step=0,\n",
       "       max_depth=3, min_child_weight=3, missing=None, n_estimators=72,\n",
       "       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,\n",
       "       reg_alpha=1.5, reg_lambda=0.5, scale_pos_weight=1, seed=3,\n",
       "       silent=True, subsample=0.6)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb.fit(X_train,y_train,eval_metric='mlogloss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_predict = xgb.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 2, 2, ..., 1, 2, 2], dtype=int64)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
